In [1]:
    
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p
from sklearn.preprocessing import normalize, scale, MultiLabelBinarizer
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
    
In [3]:
    
# Load weight
with open('../src/mane/prototype/embeddings/BC3047.weights', 'rb') as f:
    w = p.load(f)
# Load graph
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')
    
In [4]:
    
emb = (w[0] + w[1]) / 2
emb = normalize(emb)
    
In [5]:
    
(w[0] + w[1])[0]
    
    Out[5]:
In [6]:
    
normalize(_)
    
    
    Out[6]:
In [7]:
    
emb[0]
    
    Out[7]:
In [9]:
    
x_train, yl_train, x_test, yl_test = bc.get_ids_labels(0.5)
    
In [10]:
    
X_train = [emb[i] for i in x_train]
Y_train = MultiLabelBinarizer().fit_transform(yl_train)
    
In [11]:
    
Y_train.shape
    
    Out[11]:
In [12]:
    
for i,j in bc._communities.items():
    if 39 in j:
        print(i)
    
    
There is only 8 nodes with community 39. This might cause a problem.
In [15]:
    
bc._communities[1465]
    
    Out[15]:
In [65]:
    
lg = OneVsRestClassifier(LogisticRegression(C=1e5))
    
In [66]:
    
lg.fit(X_train, Y_train)
    
    Out[66]:
In [33]:
    
lg.predict(emb[9566].reshape(1,-1))
    
    Out[33]:
In [30]:
    
emb[5].dot(emb[0])
    
    Out[30]:
In [31]:
    
x_train[0]
    
    Out[31]:
In [32]:
    
x_train[1]
    
    Out[32]:
In [38]:
    
Y_train[8]
    
    Out[38]:
In [39]:
    
lg.predict_proba(emb[1234].reshape(1,-1))
    
    Out[39]:
In [40]:
    
bc._communities[1234]
    
    Out[40]:
In [56]:
    
lg.predict_proba(emb[1234].reshape(1,-1)).argsort()[0][-4:]
    
    Out[56]:
In [60]:
    
lg.predict_proba(emb[5437].reshape(1,-1)).argsort()[0]
    
    Out[60]:
In [58]:
    
bc._communities[5437]
    
    Out[58]:
In [61]:
    
for i in bc[5437]:
    print(bc._communities[i])
    
    
In [62]:
    
bc[5437]
    
    Out[62]:
In [72]:
    
for i in bc[7999]:
    if 32 in bc._communities[i]:
        print(i)
    
    
In [77]:
    
lg.predict_proba(emb[6984].reshape(1,-1))[0].argmax()
    
    Out[77]:
In [78]:
    
for x in [14,691,1250,1344,1465,1550,4709,7759]:
    if x in x_train:
        print('la')
    
    
In [ ]: